Data input

if (!dir.exists("Spf66_supplementary_fits")) dir.create("Spf66_supplementary_fits")
if (!dir.exists("Spf66_supplementary_fits/Sporozoite_ratio")) dir.create("Spf66_supplementary_fits/Sporozoite_ratio")
if (!dir.exists("Spf66_supplementary_fits/Age_stratified_FOI")) dir.create("Spf66_supplementary_fits/Age_stratified_FOI")

RUN_ALL <- FALSE # flag whether or not to re-run fitting

# source code for model fitting
source("Model_calibration_code/clin_inf_likelihood_helper_func.R")
source("Model_calibration_code/Metropolis_Hastings_fit.R")
source("Model_calibration_code/simulate_clinical_recurrences.R")

# pre-processed data from SPf66 cohort for model fitting
patient_metadata <- read_rds("Spf66_data_cleaned/patient_metadata.rds")
inf_states_by_VIN <- read_rds("Spf66_data_processed/discretised_infection_matrix.rds")
SEASONALITY <- read_rds("Spf66_data_processed/seasonality_vector.rds") # inferred from Pf

N_COHORT <- nrow(patient_metadata)
keep_VIN <- as.character(patient_metadata$VIN)

# results for baseline model fitting
MCMC_results_main <- read_rds("Spf66_calibration/MCMC_results_main.rds")

# discretisation into uniform windows 
START_DATE <- as.Date("1993-10-01")
END_DATE <- as.Date("1995-07-15")
STUDY_DURATION <- as.numeric(difftime(END_DATE, START_DATE, unit="days"))
TIME_STEP <- 10
N_OBS <- STUDY_DURATION%/%10
SHIFT_WINDOW <- 20 # estimate Pv FOI separately before and after this window
YEAR_IN_TIMESTEP <- 365%/%TIME_STEP
THRESHOLD_AGE <- 2*YEAR_IN_TIMESTEP # possible stratification in FORI above/below this age

# fixed covariates
PREL_BASELINE <- 0.4
N_AGES <- patient_metadata[names(inf_states_by_VIN), "AGE"]*YEAR_IN_TIMESTEP # in units of TIME_STEP
names(N_AGES) <- names(inf_states_by_VIN)
MIN_AGE <- min(N_AGES)
MAX_AGE <- max(N_AGES)

# number of chains and iterations for MCMC (nested parallelisation over chains)
N_CHAINS <- 4
N_ITER <- 100000
BURNIN_PROP <- 0.2
N_CORES_PER_CHAIN <- 8

# parameters for Metropolis-Hastings proposal
LAMBDA_PROP_SD <- 0.02/365
NU_PROP_SD <- 0.2
ETA_PROP_SD <- 1/2000
LOGIT_RHO_PROP_SD <- 0.05
LOG_GAMMA_PROP_SD <- 0.05

# hyperparameters for informative priors
LOG_GAMMA_PRIOR_SD <- 0.6
LOGIT_RHO_PRIOR_SD <- 0.7

# parameters for posterior predictive data
N_POSTERIOR_PREDICTIVE_DATASETS <- 2000
PROPHYLAXIS_WINDOW <- 1
BUNCHING_WINDOW <- 2

# observed data for posterior predictive checking
masking_matrix <- read_rds("Spf66_data_processed/discretised_masking_matrix.rds")
observed_incidence_by_indiv <- read_rds("Spf66_data_processed/incidence_by_individual.rds") 

# formatting
POSTERIOR_COL <- "#cf7940"
OBSERVED_COL <- "#4878b8"

Misspecification of sporozoite fating probability

The ratio of hypnozoites to immediately-developing forms in the baseline model is informed by in vivo estimates for the Chesson strain, which may not be representative for Southeast Asian strains. We thus perform a sensitivity analysis over the sporozoite ratio.

PREL_VALS <- c(0.1, 0.25, 0.6, 0.75, 0.9)
prel_seeds <- c("11061897", "19121927", "22101900", "19121927", "03101924")

MCMC_results_prel <- list()

for (n in 1:length(PREL_VALS)) {
  if (!file.exists(paste0("Spf66_supplementary_fits/Sporozoite_ratio/prel_sensitivity_analysis_", n, ".rds"))) {
    
  set.seed(prel_seeds[n])

  # rescale LAMBDA2 to be the mean FOI in the second stage of the study
  MCMC_results <- Metropolis_Hastings(inf_states_by_VIN, N_AGES, PREL_VALS[n], N_OBS, TIME_STEP,
                                    SEASONALITY, SHIFT_WINDOW, THRESHOLD_AGE, 1,
                                    N_CHAIN, N_ITER, N_CORES_PER_CHAIN,
                                    LAMBDA_PROP_SD, NU_PROP_SD, ETA_PROP_SD,
                                    LOGIT_RHO_PROP_SD, LOG_GAMMA_PROP_SD,
                                    LOG_GAMMA_PRIOR_SD, LOGIT_RHO_PRIOR_SD) %>%
      mutate(LAMBDA2=LAMBDA2*mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]))
  
    write_rds(bind_cols(MCMC_results, PREL=PREL_VALS[n]), 
              paste0("Spf66_supplementary_fits/Sporozoite_ratio/prel_sensitivity_analysis_", n, ".rds"))
    
    MCMC_results_prel[[n]] <- bind_cols(MCMC_results, PREL=PREL_VALS[n])
  
  } else {
    
    MCMC_results_prel[[n]] <- read_rds(paste0("Spf66_supplementary_fits/Sporozoite_ratio/prel_sensitivity_analysis_", n, ".rds"))
    
  }
}

MCMC_results_prel <- append(MCMC_results_prel, list(bind_cols(MCMC_results_main, PREL=PREL_BASELINE))) %>%
  lapply(function(x) x %>% subset(ITER>BURNIN_PROP*max(x$ITER)))

Age stratification in the force of inoculation

The baseline model is predicated on the assumption that there is no age-stratification in the force of inoculation between birth and age 15. The assumption of homogenoeity between ages 2 and 15 is informed by the absence of clear age strucuture in the incidence of symptomatic falciparum malaria in the SPf66 trial. However, it is plausible that behavioural differences mitigated the risk of exposure between birth and age 2. We thus re-fit the model, assuming that the force of inoculation between birth and age 2 is dampened by a fixed factor U.

U_VALS <- c(0.1, 0.25, 0.5, 0.75)
age_foi_seeds <- c("07101907", "15101999", "23071906", "27021931")

MCMC_results_age_foi <- list()

for (n in 1:length(U_VALS)) {
  if (RUN_ALL || !file.exists(paste0("Spf66_supplementary_fits/Age_stratified_FOI/age_stratified_foi_", n, ".rds"))) {
    
  set.seed(age_foi_seeds[n])

  # rescale LAMBDA2 to be the mean FOI in the second stage of the study
  MCMC_results <- Metropolis_Hastings(inf_states_by_VIN, N_AGES, PREL_BASELINE, N_OBS, TIME_STEP,
                                    SEASONALITY, SHIFT_WINDOW, THRESHOLD_AGE, U_VALS[n],
                                    N_CHAIN, N_ITER, N_CORES_PER_CHAIN,
                                    LAMBDA_PROP_SD, NU_PROP_SD, ETA_PROP_SD,
                                    LOGIT_RHO_PROP_SD, LOG_GAMMA_PROP_SD,
                                    LOG_GAMMA_PRIOR_SD, LOGIT_RHO_PRIOR_SD) %>%
      mutate(LAMBDA2=LAMBDA2*mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]))
  
    write_rds(bind_cols(MCMC_results, U=U_VALS[n]), 
              paste0("Spf66_supplementary_fits/Age_stratified_FOI/age_stratified_foi_", n, ".rds"))
    
    MCMC_results_age_foi[[n]] <- bind_cols(MCMC_results, U=U_VALS[n])
  
  } else {
    
    MCMC_results_age_foi[[n]] <- read_rds(paste0("Spf66_supplementary_fits/Age_stratified_FOI/age_stratified_foi_", n, ".rds"))
    
  }
}

MCMC_results_age_foi <- append(MCMC_results_age_foi, list(bind_cols(MCMC_results_main, U=1))) %>%
  lapply(function(x) x %>% subset(ITER>BURNIN_PROP*max(x$ITER)))

We also assess whether the assumed model of age-stratification in the force of inoculation can better recaptiulate non-monotonic age structure in the incidence of symptomatic vivax malaria.

posterior_predictive_age_foi <- lapply(MCMC_results_age_foi, function(x) {
  posterior_draws <- x[sample(1:nrow(x), N_POSTERIOR_PREDICTIVE_DATASETS),]
    
  posterior_predictive_cohorts <- 
    lapply(1:N_POSTERIOR_PREDICTIVE_DATASETS, function(i) {
      simulate_dataset(time_dependent_fori(posterior_draws[i, "LAMBDA1"], 
                                           posterior_draws[i, "LAMBDA2"]/mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]), 
                                           SHIFT_WINDOW, SEASONALITY),
                       TIME_STEP, N_AGES, N_OBS, posterior_draws[i, "NU"], 1,
                       posterior_draws[i, "ETA"], PREL_BASELINE, PROPHYLAXIS_WINDOW, BUNCHING_WINDOW,
                       calculate_pclin(N_AGES, posterior_draws[i, "LOGIT_RHO"], 
                                       posterior_draws[i, "LOG_GAMMA"], MIN_AGE, MAX_AGE),
                       masking_matrix, posterior_draws[i, "U"], MIN_AGE)})
  
  return(list(sim_data=posterior_predictive_cohorts, U=posterior_draws[1, "U"]))})

We focus on the incidence rate, computed as the quotient of the total number of symptomatic vivax episodes and the cumulative duration of clinical follow-up (adjusted for left- and right-censoring, documented absences from the camp and post-exposure prophylaxis) for each age group.

simulated_incidence_rate <- lapply(posterior_predictive_age_foi, function(sim_cohorts) {
  sapply(sim_cohorts[["sim_data"]], function(x) {
    split(x, patient_metadata[rownames(x), "AGE"]) %>% 
      sapply(function(y) sum(y==1, na.rm = TRUE)/sum(!is.na(y))*365%/%TIME_STEP)}) %>% 
    apply(1, function(x) quantile(x, c(0.025, 0.5, 0.975))) %>% 
    t %>% as.data.frame %>% 
    mutate(AGE=as.numeric(rownames(.)), U=sim_cohorts[["U"]],
           FORI_label=factor(paste0("FOI under age of 2\ndampened by factor u=", U), 
                           levels=paste0("FOI under age of 2\ndampened by factor u=", c(U_VALS, 1))),
           source="Posterior predicted")}) %>%
  bind_rows()

N_BOOTSTRAP_REPLICATES <- 2000
observed_incidence_bootstrap <- lapply(1:N_BOOTSTRAP_REPLICATES, function(i) {
  observed_incidence_by_indiv[sample(1:N_COHORT, N_COHORT, replace=T),]})

observed_incidence_rate <- lapply(observed_incidence_bootstrap, function(x) {
  x %>% group_by(AGE) %>% summarise(mean_incidence=sum(N_Pv)/sum(Followup_Pv))}) %>% 
  bind_rows(.id="iter") %>%
  group_by(AGE) %>% 
  summarise(`2.5%`=quantile(mean_incidence, 0.025),
            `50%`=quantile(mean_incidence, 0.5),
            `97.5%`=quantile(mean_incidence, 0.975)) %>%
  mutate(source="Observed")